/* "Efficiency and water use: Dynamic effects of irrigation technology adoption
by Micah Cameron-Harp and Nathan Hendricks

Code written by Micah Cameron-Harp
May 16th, 2024

This do file creates the water right group level dataset. Subsets of this
dataset are used in all analyses within our work. 

This file adds PRISM and SSURGO variables specific to each wuadet_key to the WIMAS 
	data. The WIMAS database contains the information on irrigation behavior
	such as acre-feet withdrawn, acres irrigated, and irrigation technology. 
	WUADET_KEY's uniquely identify observations in the WIMAS database.
	PRISM variables for each wuadet_key contained in "data\raw\prism_monthly.dta"
	SSURGO variables for each wuadet_key contained in "data\raw\ssurgo.dta" 
	WIMAS data for 1991 to 2019, with no duplicate wuadet_key values,
		contained in "data\raw\wimas_1991_2021.dta"
	Pre-development aquifer characteristics containined 
		in "data\raw\predev/hycond_trs.dta"
	Place of Use data is contained in "data\raw\wimas_pu.dta"
	The key linking water right groups to wuadet_key values is	
		contained in "data\raw\wrg_key.dta"
*/

/* NOTE - Global macros for directories are created in the main_text_results.do 
	file which calls this individual .do file */

*Create log file
log using "$dr_output_log\prepare_wrg_dataset", replace

*Bring in WIMAS data for 1991-2019
use "$dr_data\raw\wimas_1991_2019.dta", clear
	*Keep only records for which the use made of water is irrigation
	keep if umw_code=="IRR"

*Merge with SSURGO
merge 1:1 wuadet_key using "$dr_data\raw\ssurgo.dta", generate(ssurgo_merge)

*Merge with PRISM
merge 1:1 wuadet_key using "$dr_data\raw\prism_monthly.dta", generate(prism_merge)

/* Construct additional variables using PRISM and WIMAS variables specific at the wuadet_key level
		NOTE - WUADET_KEY currently uniquely identifies records in this dataset. 
*/

********************************************************************************
***************** Destring some WIMAS variables ********************************
********************************************************************************
	*Destring some WIMAS variables 
	foreach var of varlist type_system acres_irr dpth_water dpth_well gmd {
		destring `var', replace 
	}

********************************************************************************
***************** Create irrigation specific variables  ***************
********************************************************************************
*Create acre-feet used for irrigation variable
	gen af_used_irr = af_used if umw_code=="IRR"

*Create crop specific acreage variables 
	*Start with corn
	gen corn_acres = acres_irr if crop_code==2
		replace corn_acres = .5*acres_irr if crop_code==18 | inrange(crop_code, 23, 26)
		replace corn_acres = (1/3)*acres_irr if inrange(crop_code, 33,36) | ///
			inrange(crop_code, 43,48)
		replace corn_acres = (1/4)*acres_irr if inrange(crop_code, 53, 58) | ///
			inrange(crop_code, 63,66) | inrange(crop_code, 68, 69)
		replace corn_acres = (1/5)*acres_irr if inrange(crop_code, 70, 71) | inrange(crop_code, 73, 74)

	*Now do soybeans hist 
	gen soy_acres = acres_irr if crop_code==4
		replace soy_acres = .5*acres_irr if crop_code==20 | crop_code==24 | ///
			crop_code==27 | inrange(crop_code, 30, 31)
		replace soy_acres = (1/3)*acres_irr if crop_code==34 | crop_code==37 | ///
			inrange(crop_code, 40,41) | crop_code==43 | inrange(crop_code, 46,47) | ///
			inrange(crop_code, 49,50) | crop_code==52
		replace soy_acres = (1/4)*acres_irr if crop_code==53 | ///
			inrange(crop_code, 56,57) | inrange(crop_code, 59,60) | ///
			inrange(crop_code, 62,64) | inrange(crop_code, 66,68)
		replace soy_acres = (1/5)*acres_irr if inrange(crop_code, 71, 74)
		
		*Now do wheat
	gen wheat_acres = acres_irr if crop_code==5
		replace wheat_acres = .5*acres_irr if crop_code==21 | crop_code==25 | ///
			crop_code==28 | crop_code==30 | crop_code==32
		replace wheat_acres = (1/3)*acres_irr if crop_code==35 | crop_code==38 | ///
			crop_code==40 | crop_code==42 | crop_code==44 | crop_code==46 | ///
			inrange(crop_code, 48, 49) | crop_code==51 | crop_code==52
		replace wheat_acres = (1/4)*acres_irr if crop_code==54 | ///
			inrange(crop_code, 59, 61) | inrange(crop_code, 61,63) | ///
			inrange(crop_code, 65, 67) | crop_code==69
		replace wheat_acres = (1/5)*acres_irr if inrange(crop_code, 70, 74)
		
	*Now do sorghum
	gen sorghum_acres = acres_irr if crop_code==3
		replace sorghum_acres = .5*acres_irr if crop_code==19 | crop_code==23 | ///
			crop_code==27 | crop_code==28 | crop_code==29
		replace sorghum_acres = (1/3)*acres_irr if crop_code==33 | inrange(crop_code, 37, 39) | ///
			inrange(crop_code, 43, 45) | inrange(crop_code, 49, 51) 
		replace sorghum_acres = (1/4)*acres_irr if inrange(crop_code, 53, 55) | ///
			crop_code==56 | inrange(crop_code, 58,59) | inrange(crop_code, 63,65) | ///
			inrange(crop_code, 67, 69)
		replace sorghum_acres = (1/5)*acres_irr if crop_code==70 | inrange(crop_code, 72, 74)
		
	*Now do alfalfa
	gen alfalfa_acres = acres_irr if crop_code==1
		replace alfalfa_acres = .5*acres_irr if inrange(crop_code, 18, 22) 
		replace alfalfa_acres = (1/3)*acres_irr if inrange(crop_code, 33, 42)
		replace alfalfa_acres = (1/4)*acres_irr if inrange(crop_code, 53, 62) | inrange(crop_code, 68, 69)
		replace alfalfa_acres = (1/5)*acres_irr if inrange(crop_code, 70, 72)
		replace alfalfa_acres = (1/6)*acres_irr if crop_code==74

*Create irrigation technology specific acreage variables. Only do flood, cp, and LEPA
	*Start with flood irrigated acres
	gen flood_acres = acres_irr if type_system==1	
	*Create center-pivot irrigated acres, do not include "CP and flood" option (type_system==6)
	gen cp_acres = acres_irr if type_system==3
	*Now do center pivot with LEPA nozzles 
	gen lepa_acres = acres_irr if type_system==4
		
********************************************************************************
****************** PRISM and SSURGO ********************************************
********************************************************************************

***********************  Generate evapotranspiration  **************************
		*Change latitude to radians 
		replace latitude = latitude*_pi/180
		* G, u, gamma
		local G 0
		local u 2
		
		* gamma with elevation from gSSURGO
		gen gamma_elev=0.000665*101.3*((293-0.0065*elev)/293)^5.26
		
		************
		* Radiation
		************
		* extraterrestrial radiation (ra) 
		gen base_date=mdy(1,1,wua_year)
		local month_num = 1
		local month_abbrevs "jan feb mar apr may jun jul aug sep oct nov dec"
		foreach month of local month_abbrevs {
			* Generate day of the year (J)
			gen `month'_current_date=mdy(`month_num', 15, wua_year)
			gen `month'_J = `month'_current_date - base_date + 1
			
			* radiation
			gen `month'_dr= 1 + 0.033*cos(2*_pi*`month'_J/365)
			gen `month'_delta= 0.409*sin(2*_pi*`month'_J/365-1.39)
			gen `month'_pre_omega=-tan(latitude)*tan(`month'_delta)
			/*This is where the problem is. Arccossign is only defined [-1,1] */
			gen `month'_omega=acos(`month'_pre_omega)
			
			* Average temperature
			gen `month'_tavg = (`month'_tmin + `month'_tmax)/2
			
			* vapour pressure deficit
			gen `month'_vpd=0.5*0.6108*(exp(17.27*`month'_tmax/(`month'_tmax+237.3))-exp(17.27*`month'_tmin/(`month'_tmin+237.3)))
			
			* slope of vapour pressure curve
			gen `month'_Delta=(4098*0.6108*exp(17.27*`month'_tavg/(`month'_tavg+237.3)))/((`month'_tavg+237.3)^2)
			
			* RA in mm/day
			gen `month'_Ra=24*60/_pi*0.0820*`month'_dr*(`month'_omega*sin(latitude)*sin(`month'_delta) + cos(latitude)*cos(`month'_delta)*sin(`month'_omega))

			gen `month'_Rns=(1-0.23)*0.16*(`month'_tmax-`month'_tmin)^(1/2)*`month'_Ra
			gen `month'_Rnl=4.903*10^-9*(((`month'_tmax+273.16)^4+(`month'_tmin+273.16)^4)/2)* ///
				(0.34-0.14*(0.6108*exp(17.27*`month'_tmin/(`month'_tmin+237.3)))^(1/2))* ///
				(1.35*(0.16*(`month'_tmax-`month'_tmin)^(1/2)*`month'_Ra)/((0.75+2*10^-5)*`month'_Ra)-0.35)
			gen `month'_Rn=`month'_Rns-`month'_Rnl

			gen `month'_ET0_elev=(0.408*`month'_Delta*(`month'_Rn-`G')+gamma_elev*900/(`month'_tavg+273)*`u'*`month'_vpd)/ ///
				(`month'_Delta+gamma_elev*(1+0.34*`u'))
			gen `month'_ET0_Har=0.0023*(`month'_tavg+17.8)*(`month'_tmax-`month'_tmin)^0.5*0.408*`month'_Ra	
	local ++month_num
		}
		*Drop excess variables
		drop *_current_date *_J *_dr *_delta *_omega *_tavg *_vpd *_Delta *_Ra *_Rns *_Rnl *_Rn gamma_elev base_date
	
********************************************************************************
/* Create pre-season (January to April) growing-season (May to September) PRISM variables and evapotranspiration*/
local prism_vars "ppt tmin tmax ET0_elev ET0_Har"
	foreach pvar of local prism_vars  {
	local preseason_`pvar'_vars "jan_`pvar' feb_`pvar' mar_`pvar' apr_`pvar'"
	egen jan_april_mean_`pvar' = rmean(`preseason_`pvar'_vars')
	local growseason_`pvar'_vars "may_`pvar' jun_`pvar' jul_`pvar' aug_`pvar' sep_`pvar'"
	egen may_sep_mean_`pvar' = rmean(`growseason_`pvar'_vars')
}

*Make total pre-season (January to April) growing-season (May to September) precipitation and evapotranspiration
gen total_preseason_ppt = jan_ppt + feb_ppt + mar_ppt + apr_ppt
gen total_growseason_ppt = may_ppt + jun_ppt + jul_ppt + aug_ppt + sep_ppt

local prism_vars "ET0_elev ET0_Har"
	foreach pvar of local prism_vars  {
		gen total_preseason_`pvar' = 31*jan_`pvar' + 28*feb_`pvar' + 31*mar_`pvar' + 30*apr_`pvar'
		gen total_growseason_`pvar' = 31*may_`pvar' + 30*jun_`pvar' + 31*jul_`pvar' + 31*aug_`pvar' + 30*sep_`pvar'
	}

/* Save intermediate dataset so we can merge it to the key linking wuadet_key's 
to water right groups (WR_GROUP) */
save "$dr_temp\wuadet_level.dta", replace


********************************************************************************
********** Aggregate data to the water right group level ***********************
********************************************************************************
/*
Step 1 - Take the water right group key linking water rights groups with wuadet_key's and then
merge in the WIMAS data with PRISM and SSURGO variables by the wuadet_key's. 
Step 2 - Then collapse the data, as each wuadet_key appears only once, at the WR_GROUP level
to get the aggregate water use and acres irrigated within a water right group.
*/
use "$dr_data\raw\wrg_key.dta", clear
keep WR_GROUP wuadet_key 
duplicates drop

*Step 1 - merge with wimas
merge 1:1 wuadet_key using "$dr_temp\wuadet_level.dta", generate(wimas_merge),
keep if wimas_merge==3
drop wimas_merge
drop *_merge
	*Drop wuadet_key files that do not have irrigation as the use made of water
	drop if umw_code!="IRR"
	*Set type_system=. if af_used_irr==0 & acres_irr==0
	drop if af_used_irr>0 & acres_irr==0
	drop if acres_irr>0 & af_used_irr==0
	replace type_system=. if af_used_irr==0 & acres_irr==0

*Step 2 - collapse at the WR_GROUP level
	*First tabulate necessary categorical variables
	foreach var of varlist type_system crop_code county gmd umw_code {
		tab `var', generate(`var'_)
		drop `var'
	}

	*Collapse 
		collapse (sum) af_used af_used_irr acres_irr *_acres ///
				 (mean) hyd_cond sy predev_dtw predev_sat dpth_water dpth_well ///
					longitude latitude slope-musumcpct jan_tmin-dec_ppt ///
					jan_ET0_elev-total_growseason_ET0_Har type_system* ///
					crop_code* county* gmd* umw_code*, by(WR_GROUP wua_year)
				
*Step 3 - Merge authorized quantites and authorized acreage for water right groups
	*Start with authorized quantities
	merge m:1 WR_GROUP using "$dr_data\raw\wrg_auth_quant.dta", generate(auth_quant_merge)
		drop if auth_quant_merge==2
	merge m:1 WR_GROUP using "$dr_data\raw\wrg_auth_acres.dta", generate(auth_acres_merge)
		drop if auth_acres_merge==2
	drop auth_quant_merge auth_acres_merge
	
*Step 4 - Make ssurgo and predevelopment aquifer variables time invariant
	foreach var of varlist hyd_cond sy predev_dtw predev_sat sandtotal silttotal awc slope {
		bysort WR_GROUP: egen mean_`var' = mean(`var')
		replace `var' = mean_`var'
		drop mean_`var'
	}

*Save
	save "$dr_temp\wrg_collapsed.dta", replace
	save "$dr_data\final\wrg_collapsed.dta", replace
	
*Close log 
	log close